// Trade-Policy Dynamics: Evidence from 60 Years of U.S.-China Trade
// Alessandria,  Khan, Khederlarian, Ruhl, and Steinberg

// inputs: 	Schott/imp_detl_yearly_89.dta ... imp_detl_yearly_108.dta
//			cty_codes.dta
//			mfa_hs8.dta
//			spread_hs8.dta

//outputs: 	dataset_hs8.dta

// This code constructs a product-level data set from the HS data, 1989--08. 

set varabbrev off 
clear all 

global dir_int "../20 Intermediate files/"
global dir_raw "../01 Raw data/"

//Build dataset by appending yearly data files
forv t=89/108 {
	use "${dir_raw}Schott/imp_detl_yearly_`t'n.dta", clear
	gen hs8 = substr(scommodity,1,8)
	rename con_qy1_yr q_jst
	rename con_val_yr v_jst
	gen vcif_jst = v_jst + con_cha_yr
	rename con_cha_yr charge_jst
	rename cal_dut_yr duties_jst
	drop if v_jst==0 & q_jst==0
	destring cty_code, replace
	collapse (sum) *_jst, by(hs8 cty_code year)
	save "${dir_int}/temporary_`t'.dta", replace
}

use "${dir_int}temporary_89.dta", clear
erase "${dir_int}temporary_89.dta"
forv t=90/108 {
	append using "${dir_int}temporary_`t'.dta"
	erase "${dir_int}temporary_`t'.dta"
}
save "${dir_int}us_imports_hs8.dta", replace

// Merge country codes
merge m:1 cty_code using "${dir_raw}cty_codes.dta"
drop if _merge==2
drop _merge

// Merge MFA indicator
merge m:1 hs8 using "${dir_int}mfa_hs8.dta"
drop if _merge==2
drop _merge
replace phase=0 if phase==.

// Merge NTR gap
// merge m:1 hs8 year using "${dir_int}hs8_spread.dta"
merge m:1 hs8 year using "${dir_int}spread_hs8.dta"
drop if _merge==2
drop _merge

// Create NTR gap
bysort hs8: egen s_2001 = mean(s/(year==2001))

// Clean up some country details
rename cty_code cty

// Note: Yugoslavia and poland were exempted from NNTR treatment after 1962, Cuba is not identified
gen ntr_cty = cty!=4610 & cty!=5700 & cty!=4850 & cty!=4370 & cty!=4350 & cty!=5520 & cty!=5550 & cty!=4810 & cty!=4870 & cty!=5790

gen fta= (cty==5081 & year>1984) | (cty==5110 & year>2000) | (cty==6021 & year>2004) | (cty==5590 & year>2003) ///
| (cty==3370 & year>2003) | (cty==5250 & year>2005) | (cty==7140 & year>2005) | (cty==5230 & year>2005) | (cty==3330 & year>2007) ///
| (cty==2230 & year>2008) | (cty==2110 & year>2008) | (cty==2050 & year>2008) | (cty==2150 & year>2008) ///
| (cty==2190 & year>2008) | (cty==2470 & year>2008) | (cty==5800 & year>2009) ///
| (cty==3010 & year>2011) | (cty==2250 & year>2011) | (cty==5880 & year>2019) | (cty==1220 & year>1987) | (cty==2010 & year>1993)

gen chn = cty==5700
gen ussr = cty==4610
gen czech = cty==4350
gen roma = cty==4850
gen hunga = cty==4870
gen viet = cty==5520

// Balanced sample
bysort cty hs8: egen tempmin = min(year)
gen tempchn = tempmin<=1990 & chn==1
bysort hs8: egen balanced_chn = max(tempchn)
drop temp*

// Create some variables
gen lv_jst = log(v_jst)
gen tariff_jst = duties_jst/v_jst
gen ltariff_jst = log(1+tariff_jst)
gen shipping_jst = charge_jst/v_jst
gen lshipping_jst = log(1+shipping_jst)
gen lgap_2001 = log(1+s_2001)
gen lgap = log(1+s)
encode hs8, gen(hs8n)
gen postWTO= year>2000
egen id = group(cty hs8)

sort cty hs8 year 
order cty* hs8* id year 
compress
save "${dir_int}dataset_hs8.dta", replace
